Data

Data sources

On 2020-07-20, I searched for all articles with Publication Name = American Naturalist on Web of Science Core collection. There were 11029 results. I manually exported all results in batches of 500 (export limit on WoS, when you also want to export the abstract), as Tab-delimited (Mac). I collated the downloaded files, opened the merged file in Excel; I removed empty columns, renamed columns, as saved the file as .csv.

The source files are in https://github.com/flodebarre/2020_AmNatHistory/tree/master/data/sourcesWOS/withAbstract.

Load data

allArticles <- read.csv("../data/AmNat_allAbstracts.csv", stringsAsFactors = FALSE)

Explore data

Number of articles loaded: 11029 items in the dataset.

library(RColorBrewer)
cols <- brewer.pal(6, "Set2")
maincol <- cols[1]

Number of articles per year:

nb.byY <- aggregate(allArticles$Authors, by = list(allArticles$PublicationYear), FUN = length)
par(las = 1)
plot(nb.byY$Group.1, nb.byY$x, 
     xlab = "Publication Year", ylab = "Number of items", 
     ylim = c(0, max(nb.byY$x)), axes = FALSE, 
     pch = 16, col = maincol)
axis(1, pos = 0)
axis(2, pos = 1900-1)

Articles with abstract:

# Add information about number of characters in the abstract of each article
allArticles <- cbind(allArticles, AbsLength = nchar(allArticles$Abstract))

# Compute proportion of articles with non zero abstract length, by year
isAbs.byY <- aggregate((!is.na(allArticles$Abstract)), by = list(allArticles$PublicationYear), FUN = mean)

# Plot the result
par(las = 1)
plot(isAbs.byY$Group.1, isAbs.byY$x, 
     xlab = "Publication Year", ylab = "Proportion of items with non zero abstract length", 
     pch = 16, col = cols[2], 
     axes = FALSE)
axis(1, pos = 0)
axis(2, pos = 1900-1)

Sample sizes, items with abstracts, per year

# Compute proportion of articles with non zero abstract length, by year
isAbs.byY <- aggregate((!is.na(allArticles$Abstract)), by = list(allArticles$PublicationYear), FUN = sum)

# Plot the result
par(las = 1)
plot(isAbs.byY$Group.1, isAbs.byY$x, 
     xlab = "Publication Year", ylab = "Number of items with non zero abstract length", 
     pch = 16, col = cols[2], 
     axes = FALSE)
axis(1, pos = 0)
axis(2, pos = 1900-1)

Subset of the data for which there are abstracts.

absArticles <- allArticles[!is.na(allArticles$Abstract), ]

Check distribution of lengths of abstracts

par(las = 1)
nCA <- nchar(absArticles$Abstract)
hist(nCA, main = "Distribution of the number of characters in abstracts", 
     xlab = "Number of characters", breaks = seq(0, max(nCA)+50, by = 50), col = cols[3], border = "white", 
     axes = FALSE)
axis(1, pos = 0)
axis(2, pos = 0)

Count the number of words

nbWordsAbs <- unlist(lapply(strsplit(absArticles$Abstract, " "), length))
hist(nbWordsAbs, main = "Distribution of the number of words in abstracts", 
     xlab = "Number of words", breaks = seq(0, max(nbWordsAbs)+50, by = 10), col = cols[3], border = "white", 
     axes = FALSE)
axis(1, pos = 0)
axis(2, pos = 0)

There are 6636 such items.

Search for words

Function to find a specific word in an abstract

findWord <- function(word, line){
  grepl(pattern = word, x = absArticles[line, "Abstract"], ignore.case = TRUE)
}

Function to count occurrences of a specific word
NB: the proportions are calculated among articles for which there are abstracts

countWord <- function(word, ...){
  v <- unlist(lapply(as.list(1:nrow(absArticles)), function(i) findWord(word, i)))
  
  word.byY <- aggregate(v, by = list(absArticles$PublicationYear), FUN = mean, na.rm = TRUE)
  
  par(las = 1)
  plot(range(word.byY$Group.1), range(word.byY$x), # Initialize the plot
       ylim = c(0, 1), 
       xlab = "Publication Year", ylab = paste0("Proportion of abstracts containing the word `", word, "`"), 
       main = word,
       axes = FALSE,
       type = "n")
  #rect(1900-1, 0, 2020, 1, col = gray(0.9), border = gray(0, 0))
  
  #for(i in seq(0.1, 1, by = 0.1)){
  #  lines(c(1900-1, 2020), rep(i, 2), col = gray(0.95), lwd = 1)
  #}
  
  axis(1, pos = 0)
  axis(2, pos = 1900-1)
  
  # Add the points
  # (doing this so that the points are above the x axis...)
  points(word.byY$Group.1, word.byY$x, pch = 16, col = maincol) 
  
  v
}

Model

absArticles$wordModel <- countWord("model")

For comparison, Chris’ figure

knitr::include_graphics("pics/figCM.png")

xx <- countWord("model organism")

xx <- countWord("model system")

Theor*

absArticles$wordTheor <- countWord("theor")

Theory

xx <- countWord("theory")

Theoretical

xx <- countWord("theoretical")

Conceptual

absArticles$wordConceptual <- countWord("conceptual")

Equation

absArticles$wordEquation <- countWord("equation")

Analy*

absArticles$wordAnaly <- countWord("analy")

xx <- countWord("analytic")

xx <- countWord("analyze")

xx <- countWord("analyse")

xx <- countWord("analytical")

xx <- countWord("analytical solution")

Simulat*

absArticles$wordSimulat <- countWord("simulat")

Quantitative

absArticles$wordQuantitative <- countWord("quantitative")

Experiment

absArticles$wordExperiment <- countWord("experiment")

Citations

makeTransparent<-function(someColor, alpha=100)
{
  newColor<-col2rgb(someColor)
  apply(newColor, 2, function(curcoldata){rgb(red=curcoldata[1], green=curcoldata[2],
    blue=curcoldata[3],alpha=alpha, maxColorValue=255)})
}

Average per year and per type

par(las = 1)
TC.byY.type <- aggregate(absArticles$TimesCitedWOS, by = list(absArticles$PublicationYear, absArticles$wordModel), FUN = mean)
colsModel <- rep(cols[3], nrow(TC.byY.type))
colsModel[!TC.byY.type$Group.2] <- cols[4]

colsModelTrp <- makeTransparent(colsModel, 200)

plot(TC.byY.type$Group.1, TC.byY.type$x, col = colsModelTrp, xlim = c(1940, 2020), pch = 16, 
     xlab = "Year", ylab = "Average number of citations")
legend(2000, 600, col = c(cols[3], cols[4]), legend = c("Model", "no Model"), pch = 16)

par(las = 1)
colsModel <- rep(cols[3], nrow(absArticles))
colsModel[!absArticles$wordModel] <- cols[4]
colsModelTrp <- makeTransparent(colsModel, 150)
plot(absArticles$PublicationYear, absArticles$TimesCitedWOS+1, col = colsModelTrp, log = "y", xlim = c(1955, 2020), pch = 16, xlab = "Year", ylab = "Number of citations (+1), log scale")

plot(absArticles$PublicationYear, absArticles$TimesCitedWOS, col = colsModelTrp, log = "", xlim = c(1955, 2020), pch = 16, xlab = "Year", ylab = "Number of citations")
legend(2010, 5000, col = c(cols[3], cols[4]), legend = c("Model", "no Model"), pch = 16)

Citations, all items (including those without abstracts)

par(las = 1)
plot(allArticles$PublicationYear, allArticles$TimesCitedWOS, col = gray(0.7), log = "", xlim = c(1955, 2020), pch = 16, xlab = "Year", ylab = "Number of citations")

Show articles with more than 2000 citations

xx <- allArticles[allArticles$TimesCitedWOS > 2000, c("TimesCitedWOS", "Authors", "Title")]
ixx <- sort(xx$TimesCitedWOS, index.return = TRUE, decreasing = TRUE)$ix
xx[ixx, ]
##      TimesCitedWOS                                Authors
## 7002          7171                                 NEI, M
## 5463          6386                         FELSENSTEIN, J
## 7360          3403                              PAINE, RT
## 4925          3340                            PULLIAM, HR
## 6538          3090                              GRIME, JP
## 7085          2837                             JANZEN, DH
## 7308          2617              MACARTHUR, RH; PIANKA, ER
## 6541          2586               CONNELL, JH; SLATYER, RO
## 7674          2522                         HUTCHINSON, GE
## 7301          2412                           WILLIAMS, GC
## 7258          2406                 MACARTHUR R; LEVINS, R
## 6417          2252                              HUSTON, M
## 7075          2228                             PIANKA, ER
## 7599          2131 HAIRSTON, NG; SMITH, FE; SLOBODKIN, LB
## 6846          2093                SMITH, CC; FRETWELL, SD
##                                                                                                                     Title
## 7002                                                                                 GENETIC DISTANCE BETWEEN POPULATIONS
## 5463                                                                               PHYLOGENIES AND THE COMPARATIVE METHOD
## 7360                                                                            FOOD WEB COMPLEXITY AND SPECIES DIVERSITY
## 4925                                                                            SOURCES, SINKS, AND POPULATION REGULATION
## 6538 EVIDENCE FOR EXISTENCE OF THREE PRIMARY STRATEGIES IN PLANTS AND ITS RELEVANCE TO ECOLOGICAL AND EVOLUTIONARY THEORY
## 7085                                                        HERBIVORES AND THE NUMBER OF TREE SPECIES IN TROPICAL FORESTS
## 7308                                                                               ON OPTIMAL USE OF A PATCHY ENVIRONMENT
## 6541               MECHANISMS OF SUCCESSION IN NATURAL COMMUNITIES AND THEIR ROLE IN COMMUNITY STABILITY AND ORGANIZATION
## 7674                                                    HOMAGE TO SANTA-ROSALIA OR WHY ARE THERE SO MANY KINDS OF ANIMALS
## 7301                                          NATURAL SELECTION COSTS OF REPRODUCTION AND A REFINEMENT OF LACKS PRINCIPLE
## 7258                                                 LIMITING SIMILARITY CONVERGENCE AND DIVERGENCE OF COEXISTING SPECIES
## 6417                                                                              GENERAL HYPOTHESIS OF SPECIES-DIVERSITY
## 7075                                                                                          R-SELECTION AND K-SELECTION
## 7599                                                             COMMUNITY STRUCTURE, POPULATION CONTROL, AND COMPETITION
## 6846                                                                 OPTIMAL BALANCE BETWEEN SIZE AND NUMBER OF OFFSPRING